### File: 3_image_regressions
### Purpose: Replicate Figure 1(B), 4, plus all results in Appendices D and E
### Created: 12/2/2024


#######################
## required packages
#######################
require(tidyverse)
require(betaDelta)
require(stargazer)


#######################
### paths
#######################
main <- getwd()

data_dir <- str_c(main, "/data/")

results_dir <- str_c(main, "/results/")

app_d <- str_c(results_dir, "/appendix_d/")
app_e <- str_c(results_dir, "/appendix_e/")

#######################
### Read in the data 
#######################

master <- read_csv(str_c(data_dir, "all_tweets_w_labeled_images.csv"))


#######################
### Figure 4A
#######################

## Beginning with this replication, then will pick up 1A along the way
## This figure requires the running of multiple OLS regressions,
## then standardizing the output

## pooled formula
pool_formula_all <- log_end_rts ~ avg_enthus +
  avg_anxiety + avg_avers + avg_sad + avg_disgust + 
  avg_socid_look + protest_avg + humor_avg + irony_avg +
  leader_avg + socsymb_avg +
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
pool_result_all <- lm(pool_formula_all, data = master)

#### betaDelta for standardized coefs and CIs
pool_result_betaD <- BetaDelta(pool_result_all, type = "mvn",
                               alpha = c(0.05, 0.001))

## tidy the output
## Want to get the variables and the conf ints from the beta object
pool_tidy_all_betaD <- summary(pool_result_betaD) %>% 
  as.data.frame() %>% 
  # want rowname as column
  rownames_to_column("Variable") %>% 
  mutate(Annotations = "Pooled") %>% 
  # rename the low and high CI values
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## rep formula
rep_formula_all <- log_end_rts ~ avg_enthus_rep +
  avg_anxiety_rep + avg_avers_rep + avg_sad_rep + 
  avg_disgust_rep + avg_socid_look_rep + protest_avg_rep +
  humor_avg_rep + irony_avg_rep + leader_avg_rep + 
  socsymb_avg_rep +  
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
rep_result_all <- lm(rep_formula_all, data = master)

#### betaDelta for standardized coefs and CIs
rep_result_betaD <- BetaDelta(rep_result_all, type = "mvn",
                              alpha = c(0.05, 0.001))

## Tidy the standardized stuff
rep_tidy_all_betaD <- summary(rep_result_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("Variable") %>% 
  mutate(Annotations = "Republican") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)


#############
## dem formula
dem_formula_all <- log_end_rts ~ avg_enthus_dem +
  avg_anxiety_dem + avg_avers_dem + avg_sad_dem + 
  avg_disgust_dem + avg_socid_look_dem + protest_avg_dem +
  humor_avg_dem + irony_avg_dem + leader_avg_dem + 
  socsymb_avg_dem +  
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
dem_result_all <- lm(dem_formula_all, data = master)

#### betaDelta for standardized coefs and CIs
dem_result_betaD <- BetaDelta(dem_result_all, type = "mvn",
                              alpha = c(0.05, 0.001))

## Tidy the standardized stuff
dem_tidy_all_betaD <- summary(dem_result_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("Variable") %>% 
  mutate(Annotations = "Democrat") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## female formula
female_formula_all <- log_end_rts ~ avg_enthus_female +
  avg_anxiety_female + avg_avers_female + avg_sad_female + 
  avg_disgust_female + avg_socid_look_female + protest_avg_female +
  humor_avg_female + irony_avg_female + leader_avg_female + 
  socsymb_avg_female + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
female_result_all <- lm(female_formula_all, data = master)

#### betaDelta for standardized coefs and CIs
female_result_betaD <- BetaDelta(female_result_all, type = "mvn",
                                 alpha = c(0.05, 0.001))

## Tidy the standardized stuff
female_tidy_all_betaD <- summary(female_result_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("Variable") %>% 
  mutate(Annotations = "Female") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)


#############
## male formula
male_formula_all <- log_end_rts ~ avg_enthus_male +
  avg_anxiety_male + avg_avers_male + avg_sad_male + 
  avg_disgust_male + avg_socid_look_male + protest_avg_male +
  humor_avg_male + irony_avg_male + leader_avg_male + 
  socsymb_avg_male + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
male_result_all <- lm(male_formula_all, data = master)

#### betaDelta for standardized coefs and CIs
male_result_betaD <- BetaDelta(male_result_all, type = "mvn",
                               alpha = c(0.05, 0.001))

## Tidy the standardized stuff
male_tidy_all_betaD <- summary(male_result_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("Variable") %>% 
  mutate(Annotations = "Male") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)


###########################
### reshaping the output
## make sure to use the betaD results, 
output_tib <- bind_rows(pool_tidy_all_betaD,
                        rep_tidy_all_betaD,
                        dem_tidy_all_betaD,
                        female_tidy_all_betaD,
                        male_tidy_all_betaD) %>% 
  ## dropping the controls/intercept, which aren't plotted
  filter(!(Variable %in% c("(Intercept)", "start_followers",
                           "tweet_hour")),
         !(str_detect(Variable, "as.character")),
         !(str_detect(Variable, "hashtag")),
         !(str_detect(Variable, "type_tweet"))) %>% 
  ## unified versions of the variable names
  mutate(term_mod = case_when(str_detect(Variable, "_rep") ~ str_sub(Variable, 
                                                                     end = -5),
                              str_detect(Variable, "_dem") ~ str_sub(Variable, 
                                                                     end = -5),
                              str_detect(Variable, "_female") ~ str_sub(Variable, 
                                                                        end = -8),
                              str_detect(Variable, "_male") ~ str_sub(Variable, 
                                                                      end = -6),
                              TRUE ~ Variable)) %>% 
  ## more human-readable variable names
  mutate(term_nice = case_when(term_mod == "socsymb_avg" ~ "Social Symbol",
                               term_mod == "protest_avg" ~ "Protest",
                               term_mod == "leader_avg" ~ "Leader",
                               term_mod == "irony_avg" ~ "Irony",
                               term_mod == "humor_avg" ~ "Humor",
                               term_mod == "avg_socid_look" ~ "Looks Similar",
                               term_mod == "avg_enthus" ~ "Enthusiasm",
                               term_mod == "avg_sad" ~ "Sadness",
                               term_mod == "avg_disgust" ~ "Disgust",
                               term_mod == "avg_avers" ~ "Aversion",
                               term_mod == "avg_anxiety" ~ "Anxiety",
                               TRUE ~ term_mod)) %>% 
  mutate(term_nice = fct_relevel(term_nice,
                                 "Social Symbol",
                                 "Looks Similar",
                                 "Leader", "Protest",
                                 "Irony", "Humor")) %>% 
  mutate(Annotations = fct_relevel(Annotations,
                                   "Pooled",
                                   "Democrat",
                                   "Republican",
                                   "Female",
                                   "Male"))

## add an indicator for significant or not
coef_differ <- output_tib %>% 
  mutate(sig_05 = as_factor(if_else(p<0.05, 1, 0)))

## plot it
ggplot(coef_differ, aes(term_nice, est)) +
  geom_point(aes(color = Annotations, 
                 shape = sig_05,
                 alpha = sig_05),
             position = position_dodge(width=0.4),
             size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high,
                    color = Annotations,
                    alpha = sig_05), 
                width = 0.2, 
                linewidth = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  scale_alpha_manual(values = c("0" = 0.4, "1" = 1)) +
  labs(alpha = "Significant (.05)", shape = "Significant (.05)") +
  #scale_color_grey() +
  #  labs(title = "Standardized coefficients from \n linear models of RTs") +
  ylab("Standardized regression coefficients \n predicting retweets") +
  xlab("Image variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
#ggsave(str_c(results_dir, "4A_coeff_diff_std_logDV.pdf"), 
#       width = 20, height = 20, units = "cm")

# TIFF for publication
ggsave(str_c(results_dir, "4A_coeff_diff_std_logDV.tiff"), 
       width = 20, height = 25, units = "cm",
       device = "tiff", dpi = 300)


###############################
#### Table the results for the regressions
###############################

#### This is Appendix D, Figure 2 results
stargazer(male_result_all, title = "Male Labels",
          label = "male_result_all", single.row = T,
          font.size = "scriptsize",
          out = str_c(app_d, "appD_Fig2_male_result_all.tex"))

stargazer(female_result_all, title = "Female Labels",
          label = "female_result_all", single.row = T,
          font.size = "scriptsize",
          out = str_c(app_d, "appD_Fig2_female_result_all.tex"))

stargazer(dem_result_all, title = "Democratic Labels",
          label = "dem_result_all", single.row = T,
          font.size = "scriptsize",
          out = str_c(app_d, "appD_Fig2_dem_result_all.tex"))

stargazer(rep_result_all, title = "Republican Labels",
          label = "rep_result_all", single.row = T,
          font.size = "scriptsize",
          out = str_c(app_d, "appD_Fig2_rep_result_all.tex"))

stargazer(pool_result_all, title = "Pooled Labels",
          label = "pool_result_all", single.row = T,
          font.size = "scriptsize",
          out = str_c(app_d, "appD_Fig2_pool_result_all.tex"))

###############################
### While we are here, make Appendix E, Figure 4
###############################

## Same as 4A figure, with 99% CIS
## plot it
ggplot(coef_differ, aes(term_nice, est)) +
  geom_point(aes(color = Annotations, 
                 shape = sig_05,
                 alpha = sig_05),
             position = position_dodge(width=0.4),
             size = 3) +
  geom_errorbar(aes(ymin = conf.low.99, ymax = conf.high.99,
                    color = Annotations,
                    alpha = sig_05), 
                width = 0.2, 
                linewidth = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  scale_alpha_manual(values = c("0" = 0.4, "1" = 1)) +
  labs(alpha = "Significant (.05)", shape = "Significant (.05)") +
  ylab("Standardized regression coefficients \n predicting retweets") +
  xlab("Image variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it -- as PDF for appendix
ggsave(str_c(app_e, "appE_4_coeff_diff_std_logDV_99.pdf"), 
       width = 20, height = 20, units = "cm")

######################
### Figure 1B 
#####################

## highlights the difference in male/female annotations,
## pulling from same results tables

### the colors for the gender plots
cols_gen <- c("Female" = "purple", "Male" = "orange")
## the linetypes for gender plots
lines_gen <- c("Female" = "dashed", "Male" = "solid")

## relevant data
coef_differ_gender <- output_tib %>% 
  filter(Annotations %in% c("Male", "Female")) %>% 
  mutate(sig_05 = as_factor(if_else(p<0.05, 1, 0)))

## plot it
ggplot(coef_differ_gender, aes(term_nice, est)) +
  geom_point(aes(color = Annotations), 
             size = 3, position = position_dodge2(reverse = TRUE,
                                                  width=0.4)) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high,
                    color = Annotations, linetype = Annotations), 
                width = 0.2, 
                size = 0.5,
                position = position_dodge2(reverse = TRUE, 
                                           width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  scale_color_manual(values = cols_gen) +
  scale_linetype_manual(values = lines_gen) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  scale_alpha_manual(values = c("0" = 0.4, "1" = 1)) +
  labs(alpha = "Significant (.05)", shape = "Significant (.05)") +
  ylab("Standardized regression coefficients \npredicting retweets") +
  xlab("Image variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
## if you want a PDF
#ggsave(str_c(results_dir, "1B_coeff_diff_std_logDV_gender.pdf"), 
#       width = 20, height = 20, units = "cm")

## TIFF for publication
ggsave(str_c(results_dir, "1B_coeff_diff_std_logDV_gender.tiff"), 
       width = 20, height = 20, units = "cm",
       device = "tiff", dpi = 300)


###############################
### While we are here, make Appendix E, Figure 3
###############################

## Same as 1B figure, with 99% CIS

## plot it
ggplot(coef_differ_gender, aes(term_nice, est)) +
  geom_point(aes(color = Annotations), 
             size = 3, position = position_dodge2(reverse = TRUE,
                                                  width=0.4)) +
  geom_errorbar(aes(ymin = conf.low.99, ymax = conf.high.99,
                    color = Annotations, linetype = Annotations), 
                width = 0.2, 
                size = 0.5,
                position = position_dodge2(reverse = TRUE, 
                                           width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  scale_color_manual(values = cols_gen) +
  scale_linetype_manual(values = lines_gen) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  scale_alpha_manual(values = c("0" = 0.4, "1" = 1)) +
  labs(alpha = "Significant (.05)", shape = "Significant (.05)") +
  ylab("Standardized regression coefficients predicting retweets") +
  xlab("Image variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
ggsave(str_c(app_e, "appE_3_coeff_diff_std_logDV_gender_99.pdf"), 
       width = 20, height = 20, units = "cm")

#######################
### Figure 4B
#######################

## shows the results from including both partisans in formula
both_part_formula_all <- log_end_rts ~ avg_enthus_dem +
  avg_anxiety_dem + avg_avers_dem + avg_sad_dem + 
  avg_disgust_dem + avg_socid_look_dem + protest_avg_dem +
  humor_avg_dem + irony_avg_dem + leader_avg_dem + 
  socsymb_avg_dem + avg_enthus_rep +
  avg_anxiety_rep + avg_avers_rep + avg_sad_rep + 
  avg_disgust_rep + avg_socid_look_rep + protest_avg_rep +
  humor_avg_rep + irony_avg_rep + leader_avg_rep + 
  socsymb_avg_rep +  
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
both_part_result_all <- lm(both_part_formula_all, data = master)

#### betaDelta for standardized coefs and CIs
both_part_result_betaD <- BetaDelta(both_part_result_all, type = "mvn",
                                    alpha = c(0.05, 0.001))

## Tidy the standardized stuff
both_part_tidy_all_betaD <- summary(both_part_result_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("Variable") %>% 
  mutate(Annotations = "Both Partisan") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

coefs_both_partisan  <- both_part_tidy_all_betaD %>% 
  ## dropping the controls/intercept, which aren't plotted
  filter(!(Variable %in% c("(Intercept)", "start_followers",
                           "tweet_hour")),
         !(str_detect(Variable, "as.character")),
         !(str_detect(Variable, "hashtag")),
         !(str_detect(Variable, "type_tweet"))) %>% 
  ## unified versions of the variable names
  mutate(term_mod = case_when(str_detect(Variable, "_rep") ~ str_sub(Variable, 
                                                                     end = -5),
                              str_detect(Variable, "_dem") ~ str_sub(Variable, 
                                                                     end = -5),
                              str_detect(Variable, "_female") ~ str_sub(Variable, 
                                                                        end = -8),
                              str_detect(Variable, "_male") ~ str_sub(Variable, 
                                                                      end = -6),
                              TRUE ~ Variable)) %>% 
  ## more human-readable variable names
  mutate(term_nice = case_when(term_mod == "socsymb_avg" ~ "Social Symbol",
                               term_mod == "protest_avg" ~ "Protest",
                               term_mod == "leader_avg" ~ "Leader",
                               term_mod == "irony_avg" ~ "Irony",
                               term_mod == "humor_avg" ~ "Humor",
                               term_mod == "avg_socid_look" ~ "Looks Similar",
                               term_mod == "avg_enthus" ~ "Enthusiasm",
                               term_mod == "avg_sad" ~ "Sadness",
                               term_mod == "avg_disgust" ~ "Disgust",
                               term_mod == "avg_avers" ~ "Aversion",
                               term_mod == "avg_anxiety" ~ "Anxiety",
                               TRUE ~ term_mod)) %>% 
  mutate(term_nice = fct_relevel(term_nice,
                                 "Social Symbol",
                                 "Looks Similar",
                                 "Leader", "Protest",
                                 "Irony", "Humor")) %>% 
  ## create variable for Party, then re-level
  mutate(Party = if_else(str_sub(Variable, -4) == "_dem",
                         "Democrat",
                         "Republican")) %>% 
  mutate(Party = fct_relevel(Party, "Republican", "Democrat"))

### the colors for the partisan plots
cols <- c("Democrat" = "blue3", "Republican" = "red3")
## the linetypes for partisan plots
lines <- c("Democrat" = "dashed", "Republican" = "solid")
## the alphas for partisan plots
alphas <- c("Democrat" = .7, "Republican" = .3)

## plot it
ggplot(coefs_both_partisan, aes(term_nice, est)) +
  geom_point(aes(color = Party,
                 alpha = Party),
             size = 3, 
             position = position_dodge(width=0.4)) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high,
                    color = Party,
                    linetype = Party,
                    alpha = Party), 
                width = 0.2, 
                size = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  scale_color_manual(values = cols) +
  scale_alpha_manual(values = alphas) +
  scale_linetype_manual(values = lines) +
  coord_flip() +
  theme_classic() +
  #  labs(title = "Coefficients from linear models of RTs") +
  ylab("Standardized regression coefficients \n predicting retweets") +
  xlab("Image variable") +
  guides(color = guide_legend(reverse=T),
         alpha = guide_legend(reverse=T),
         linetype = guide_legend(reverse=T)) +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
## if you want a PDF
#ggsave(str_c(results_dir, "4B_coeff_diff_std_logDV_both_part.pdf"), 
#       width = 20, height = 20, units = "cm")

# TIFF for publication
ggsave(str_c(results_dir, "4B_coeff_diff_std_logDV_both_part.tiff"), 
       width = 20, height = 25, units = "cm",
       device = "tiff", dpi = 300)

###############################
#### While here, create Appendix D, Table 6 results
###############################

stargazer(both_part_result_all, title = "Modeling Partisan Labels Separately",
          label = "both_part_result_all", single.row = T,
          font.size = "scriptsize",
          out = str_c(app_d, "appD_Tab6_both_part_result_all.tex"))

###############################
#### While here, create Appendix E, Figure 5 results
###############################

## replicates figure 4B
## with 99% CIS
## plot it
ggplot(coefs_both_partisan, aes(term_nice, est)) +
  geom_point(aes(color = Party,
                 alpha = Party),
             size = 3, 
             position = position_dodge(width=0.4)) +
  geom_errorbar(aes(ymin = conf.low.99, ymax = conf.high.99,
                    color = Party,
                    linetype = Party,
                    alpha = Party), 
                width = 0.2, 
                size = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  scale_color_manual(values = cols) +
  scale_alpha_manual(values = alphas) +
  scale_linetype_manual(values = lines) +
  coord_flip() +
  theme_classic() +
  #  labs(title = "Coefficients from linear models of RTs") +
  ylab("Standardized regression coefficients \n predicting retweets") +
  xlab("Image variable") +
  guides(color = guide_legend(reverse=T),
         alpha = guide_legend(reverse=T),
         linetype = guide_legend(reverse=T)) +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
ggsave(str_c(app_e, "appE_5_coeff_diff_std_logDV_both_part_99.pdf"), 
       width = 20, height = 20, units = "cm")


#######################
### Figure 4C
#######################

## reweight the variable, looping over a range of Republican weights
## as a loop over a range of Republican values

rep_values = c(0.1, 0.21, 0.6, 1)

## rw formula
rw_formula_all <- log_end_rts ~ rw_enthus +
  rw_anxiety + # rw_avers + 
  rw_sad + rw_disgust + 
  rw_socid_look + rw_protest + rw_humor + rw_irony +
  rw_leader + rw_socsymb +
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## output tibble
rw_output <- vector("list", length(rep_values))
rw_output_old <- vector("list", length(rep_values))

## loop
for(i in seq_along(rep_values)){
  
  pct_rep <- rep_values[i]
  pct_dem <- 1-pct_rep
  
  master_reweight <- master %>% 
    mutate(rw_enthus = (pct_dem*avg_avers_dem) + (pct_rep*avg_avers_rep),
           rw_anxiety = (pct_dem*avg_anxiety_dem) + (pct_rep*avg_anxiety_rep),
           rw_avers = (pct_dem*avg_avers_dem) + (pct_rep*avg_avers_rep),
           rw_sad = (pct_dem*avg_sad_dem) + (pct_rep*avg_sad_rep),
           rw_disgust = (pct_dem*avg_disgust_dem) + (pct_rep*avg_disgust_rep),
           rw_socid_look= (pct_dem*avg_socid_look_dem) + (pct_rep*avg_socid_look_rep),
           rw_protest = (pct_dem*protest_avg_dem) + (pct_rep*protest_avg_rep),
           rw_humor = (pct_dem*humor_avg_dem) + (pct_rep*humor_avg_rep),
           rw_irony = (pct_dem*irony_avg_dem) + (pct_rep*irony_avg_rep),
           rw_leader = (pct_dem*leader_avg_dem) + (pct_rep*leader_avg_rep),
           rw_socsymb = (pct_dem*socsymb_avg_dem) + (pct_rep*socsymb_avg_rep))
  
  ## ols
  rw_result_all <- lm(rw_formula_all, data = master_reweight)
  
  ## standardized
  rw_result_all_betaD2 <- BetaDelta(rw_result_all, type = "mvn", 
                                    alpha = c(0.05, 0.001))
  
  ## Tidy the standardized stuff
  rw_result_all_tidy_betaD2 <- summary(rw_result_all_betaD2) %>% 
    as.data.frame() %>% 
    rownames_to_column("term") %>% 
    mutate(Annotations = rep_values[i]) %>% 
    rename(conf.low = `2.5%`, conf.high = `97.5%`,
           conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)
  
  #print(rw_tidy_all)
  ## save that output
  rw_output[[i]] <- rw_result_all_tidy_betaD2
  rw_output_old[[i]] <- rw_result_all
  
}

######################
### reshaping the output
rw_output_tib <- bind_rows(rw_output) %>%  
  filter(!(term %in% c("(Intercept)", "start_followers",
                       "tweet_hour")),
         !(str_detect(term, "as.character")),
         !(str_detect(term, "hashtag")),
         !(str_detect(term, "type_tweet")))%>% 
  mutate(term_nice = case_when(term == "rw_socsymb" ~ "Social Symbol",
                               term == "rw_protest" ~ "Protest",
                               term == "rw_leader" ~ "Leader",
                               term == "rw_irony" ~ "Irony",
                               term == "rw_humor" ~ "Humor",
                               term == "rw_socid_look" ~ "Looks Similar",
                               term == "rw_enthus" ~ "Enthusiasm",
                               term == "rw_sad" ~ "Sadness",
                               term == "rw_disgust" ~ "Disgust",
                               term == "rw_avers" ~ "Aversion",
                               term == "rw_anxiety" ~ "Anxiety",
                               TRUE ~ term)) %>% 
  mutate(term_nice = fct_relevel(term_nice,
                                 "Social Symbol",
                                 "Looks Similar",
                                 "Leader", "Protest",
                                 "Irony", "Humor"))

## plot it
ggplot(rw_output_tib, aes(term_nice, est)) +
  geom_point(aes(color = as.factor(Annotations)),
             #    alpha = as.factor(Annotations)),
             size = 3, 
             position = position_dodge(width=0.4)) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high, 
                    color = as.factor(Annotations)), 
                width = 0.2,
                size = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  labs(color = "Proportion \nRepublican") +
  ylab("Standardized regression coefficients\n predicting retweets") +
  xlab("Image Variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))


## Save it
## if you want a PDF
#ggsave(str_c(results_dir, "4C_coeff_diff_std_logDV_rw.pdf"), width = 20, 
#       height = 20, units = "cm")

# TIFF for publication
ggsave(str_c(results_dir, "4C_coeff_diff_std_logDV_rw.tiff"), 
       width = 20, height = 25, units = "cm",
       device = "tiff", dpi = 300)

#######################
### While we are here, table the results for Appendix D, Table 7
#######################

## extract relevant output
m1 <- rw_output_old[[1]]
m2 <- rw_output_old[[2]]
m3 <- rw_output_old[[3]]
m4 <- rw_output_old[[4]]

stargazer(m1, m2, m3, m4, 
          title = "Regression Results with Reweighted Measures",
          label = "rw_regression_table", single.row = T,
          font.size = "scriptsize",
          column.labels=c("Rep = 0.1", "Rep = 0.21", "Rep = 0.6", "Rep = 1"),
          out = str_c(app_d, "appD_Tab7_rw_regression_table.tex"))

#######################
### While we are here, make the robustness check for Appendix E, Figure 6
#######################

## plot same figure as 4C with 99% intervals
ggplot(rw_output_tib, aes(term_nice, est)) +
  geom_point(aes(color = as.factor(Annotations)),
             size = 3, 
             position = position_dodge(width=0.4)) +
  geom_errorbar(aes(ymin = conf.low.99, ymax = conf.high.99, 
                    color = as.factor(Annotations)), 
                width = 0.2,
                size = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  labs(color = "Proportion \nRepublican") +
  ylab("Standardized regression coefficients\n predicting retweets") +
  xlab("Image Variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))


## Save it
ggsave(str_c(app_e, "appE_6_coeff_diff_std_logDV_rw_99.pdf"), width = 20, 
       height = 20, units = "cm")


#######################
### Appendix E, Figure 7
#######################

## re-run the models with only the "content" variables

## remove emotions, humor and irony
## pooled formula
pool_formula_content <- log_end_rts ~ avg_socid_look + protest_avg +
  leader_avg + socsymb_avg +
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
pool_result_content <- lm(pool_formula_content, data = master)

## standardized
pool_result_content_betaD <- BetaDelta(pool_result_content, type = "mvn",
                                       alpha = c(0.05, 0.001))

## Tidy the standardized stuff
pool_result_content_tidy_betaD <- summary(pool_result_content_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Pooled") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)


#############
## rep formula
rep_formula_content <- log_end_rts ~ avg_socid_look_rep + protest_avg_rep +
  leader_avg_rep + 
  socsymb_avg_rep +  
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
rep_result_content <- lm(rep_formula_content, data = master)

## standardized
rep_result_content_betaD <- BetaDelta(rep_result_content, type = "mvn",
                                      alpha = c(0.05, 0.001))
## Tidy the standardized stuff
rep_result_content_tidy_betaD <- summary(rep_result_content_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Republican") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## dem formula
dem_formula_content <- log_end_rts ~ avg_socid_look_dem + protest_avg_dem +
  leader_avg_dem +  
  socsymb_avg_dem +  
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
dem_result_content <- lm(dem_formula_content, data = master)

## standardized
dem_result_content_betaD <- BetaDelta(dem_result_content, type = "mvn",
                                      alpha = c(0.05, 0.001))

## Tidy the standardized stuff
dem_result_content_tidy_betaD <- summary(dem_result_content_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Democrat") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## female formula
female_formula_content <- log_end_rts ~ avg_socid_look_female + protest_avg_female +
  leader_avg_female + 
  socsymb_avg_female + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
female_result_content <- lm(female_formula_content, data = master)

## standardized
fem_result_content_betaD <- BetaDelta(female_result_content, type = "mvn",
                                      alpha = c(0.05, 0.001))

## Tidy the standardized stuff
fem_result_content_tidy_betaD <- summary(fem_result_content_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Female") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## male formula
male_formula_content <- log_end_rts ~ avg_socid_look_male + protest_avg_male +
  leader_avg_male + 
  socsymb_avg_male + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
male_result_content <- lm(male_formula_content, data = master)

## standardized
male_result_content_betaD <- BetaDelta(male_result_content, type = "mvn",
                                       alpha = c(0.05, 0.001))

## Tidy the standardized stuff
male_result_content_tidy_betaD <- summary(male_result_content_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Male") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

### reshaping the output
output_tib_content <- bind_rows(pool_result_content_tidy_betaD,
                                rep_result_content_tidy_betaD,
                                dem_result_content_tidy_betaD,
                                fem_result_content_tidy_betaD,
                                male_result_content_tidy_betaD) %>% 
  filter(!(term %in% c("(Intercept)", "start_followers",
                       "tweet_hour")),
         !(str_detect(term, "as.character")),
         !(str_detect(term, "hashtag")),
         !(str_detect(term, "type_tweet"))) %>% 
  mutate(term_mod = case_when(str_detect(term, "_rep") ~ str_sub(term, 
                                                                 end = -5),
                              str_detect(term, "_dem") ~ str_sub(term, 
                                                                 end = -5),
                              str_detect(term, "_female") ~ str_sub(term, 
                                                                    end = -8),
                              str_detect(term, "_male") ~ str_sub(term, 
                                                                  end = -6),
                              TRUE ~ term)) %>% 
  mutate(term_nice = case_when(term_mod == "socsymb_avg" ~ "Social Symbol",
                               term_mod == "protest_avg" ~ "Protest",
                               term_mod == "leader_avg" ~ "Leader",
                               term_mod == "avg_socid_look" ~ "Looks Similar",
                               TRUE ~ term_mod)) %>% 
  mutate(term_nice = fct_relevel(term_nice,
                                 "Social Symbol",
                                 "Looks Similar",
                                 "Leader", "Protest")) %>% 
  mutate(Annotations = fct_relevel(Annotations,
                                   "Pooled",
                                   "Democrat",
                                   "Republican",
                                   "Female",
                                   "Male"))

## add indicator for significant or not
coef_differ_content <- output_tib_content %>%
  mutate(sig_05 = as_factor(if_else(p<0.05, 1, 0)))

## plot it
ggplot(coef_differ_content, aes(term_nice, est)) +
  geom_point(aes(color = Annotations, 
                 shape = sig_05,
                 alpha = sig_05),
             position = position_dodge(width=0.4), 
             size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high,
                    color = Annotations,
                    alpha = sig_05), 
                width = 0.2, 
                size = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  scale_alpha_manual(values = c("0" = 0.4, "1" = 1)) +
  labs(alpha = "Significant (.05)", shape = "Significant (.05)") +
  ylab("Standardized regression coefficients \n predicting retweets (content only)") +
  xlab("Image variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
ggsave(str_c(app_e, "appE_7_content_coeff_diff_std_logDV.pdf"), 
       width = 20, height = 20, units = "cm")


#######################
### Appendix E, Figure 18
#######################

## re-run the models with only the "reaction" variables
## pooled formula
pool_formula_reaction <- log_end_rts ~ avg_enthus +
  avg_anxiety + avg_avers + avg_sad + avg_disgust + 
  humor_avg + irony_avg +
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
pool_result_reaction <- lm(pool_formula_reaction, data = master)

## standardized
pool_result_reaction_betaD <- BetaDelta(pool_result_reaction, type = "mvn",
                                        alpha = c(0.05, 0.001))

## Tidy the standardized stuff
pool_result_reaction_tidy_betaD <- summary(pool_result_reaction_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Pooled") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## rep formula
rep_formula_reaction <- log_end_rts ~ avg_enthus_rep +
  avg_anxiety_rep + avg_avers_rep + avg_sad_rep + 
  avg_disgust_rep + 
  humor_avg_rep + irony_avg_rep + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
rep_result_reaction <- lm(rep_formula_reaction, data = master)

## standardized
rep_result_reaction_betaD <- BetaDelta(rep_result_reaction, type = "mvn",
                                       alpha = c(0.05, 0.001))

## Tidy the standardized stuff
rep_result_reaction_tidy_betaD <- summary(rep_result_reaction_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Republican") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## dem formula
dem_formula_reaction <- log_end_rts ~ avg_enthus_dem +
  avg_anxiety_dem + avg_avers_dem + avg_sad_dem + 
  avg_disgust_dem + 
  humor_avg_dem + irony_avg_dem + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
dem_result_reaction <- lm(dem_formula_reaction, data = master)

## standardized
dem_result_reaction_betaD <- BetaDelta(dem_result_reaction, type = "mvn",
                                       alpha = c(0.05, 0.001))

## Tidy the standardized stuff
dem_result_reaction_tidy_betaD <- summary(dem_result_reaction_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Democrat") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## female formula
female_formula_reaction <- log_end_rts ~ avg_enthus_female +
  avg_anxiety_female + avg_avers_female + avg_sad_female + 
  avg_disgust_female + 
  humor_avg_female + irony_avg_female + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
female_result_reaction <- lm(female_formula_reaction, data = master)

## standardized
fem_result_reaction_betaD <- BetaDelta(female_result_reaction, type = "mvn",
                                       alpha = c(0.05, 0.001))

## Tidy the standardized stuff
fem_result_reaction_tidy_betaD <- summary(fem_result_reaction_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Female") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

#############
## male formula
male_formula_reaction <- log_end_rts ~ avg_enthus_male +
  avg_anxiety_male + avg_avers_male + avg_sad_male + 
  avg_disgust_male + 
  humor_avg_male + irony_avg_male + 
  start_followers + type_tweet + tweet_hour + 
  as.character(tweet_wday) + hashtag

## ols
male_result_reaction <- lm(male_formula_reaction, data = master)

## standardized
male_result_reaction_betaD <- BetaDelta(male_result_reaction, type = "mvn",
                                        alpha = c(0.05, 0.001))

## Tidy the standardized stuff
male_result_reaction_tidy_betaD <- summary(male_result_reaction_betaD) %>% 
  as.data.frame() %>% 
  rownames_to_column("term") %>% 
  mutate(Annotations = "Male") %>% 
  rename(conf.low = `2.5%`, conf.high = `97.5%`,
         conf.low.99 = `0.05%`, conf.high.99 = `99.95%`)

### reshaping the output
output_tib_reaction <- bind_rows(pool_result_reaction_tidy_betaD,
                                 rep_result_reaction_tidy_betaD,
                                 dem_result_reaction_tidy_betaD,
                                 fem_result_reaction_tidy_betaD,
                                 male_result_reaction_tidy_betaD) %>% 
  filter(!(term %in% c("(Intercept)", "start_followers",
                       "tweet_hour")),
         !(str_detect(term, "as.character")),
         !(str_detect(term, "hashtag")),
         !(str_detect(term, "type_tweet"))) %>% 
  mutate(term_mod = case_when(str_detect(term, "_rep") ~ str_sub(term, 
                                                                 end = -5),
                              str_detect(term, "_dem") ~ str_sub(term, 
                                                                 end = -5),
                              str_detect(term, "_female") ~ str_sub(term, 
                                                                    end = -8),
                              str_detect(term, "_male") ~ str_sub(term, 
                                                                  end = -6),
                              TRUE ~ term)) %>% 
  mutate(term_nice = case_when(term_mod == "irony_avg" ~ "Irony",
                               term_mod == "humor_avg" ~ "Humor",
                               term_mod == "avg_enthus" ~ "Enthusiasm",
                               term_mod == "avg_sad" ~ "Sadness",
                               term_mod == "avg_disgust" ~ "Disgust",
                               term_mod == "avg_avers" ~ "Aversion",
                               term_mod == "avg_anxiety" ~ "Anxiety",
                               TRUE ~ term_mod)) %>% 
  mutate(term_nice = fct_relevel(term_nice,
                                 "Irony", "Humor")) %>% 
  mutate(Annotations = fct_relevel(Annotations,
                                   "Pooled",
                                   "Democrat",
                                   "Republican",
                                   "Female",
                                   "Male"))

## add indicator for significant or not
coef_differ_reaction <- output_tib_reaction %>% 
  mutate(sig_05 = as_factor(if_else(p<0.05, 1, 0)))

## plot it
ggplot(coef_differ_reaction, aes(term_nice, est)) +
  geom_point(aes(color = Annotations, 
                 shape = sig_05,
                 alpha = sig_05),
             position = position_dodge(width=0.4), 
             size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high,
                    color = Annotations,
                    alpha = sig_05), 
                width = 0.2, 
                size = 0.5,
                position = position_dodge(width=0.4)) +
  geom_hline(yintercept = 0,
             linetype="dotted") +
  coord_flip() +
  theme_classic() +
  scale_alpha_manual(values = c("0" = 0.4, "1" = 1)) +
  labs(alpha = "Significant (.05)", shape = "Significant (.05)") +
  ylab("Standardized regression coefficients \n predicting retweets (reactions only)") +
  xlab("Image variable") +
  theme(axis.text = element_text(size = 20),
        axis.title = element_text(size = 24),
        legend.title = element_text(size = 18),
        legend.text = element_text(size = 15),
        axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0)))

## Save it
ggsave(str_c(app_e, "appE_8_reactions_coeff_diff_std_logDV.pdf"), 
       width = 20, height = 20, units = "cm")
